From 4173c552df436018faba5ccfac5a28e11bdba4fc Mon Sep 17 00:00:00 2001 From: oliskoli Date: Wed, 4 Jul 2007 21:07:28 +0000 Subject: [PATCH] Re-use unused decode vector with a pointer to a fallback character set. Very useful for ascii-like character sets. There we don't need to maintain a large extra (hand-made) conversion table. --- cet.c | 9 ++++++-- cet.h | 4 ++-- cet_util.c | 36 +++++++++++++++++++++-------- reference/cet/cet-sample.latin1.txt | 10 ++++---- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/cet.c b/cet.c index 9557d9aea..e003e57c5 100644 --- a/cet.c +++ b/cet.c @@ -235,8 +235,12 @@ cet_ucs4_to_char(const int value, const cet_cs_vec_t *vec) if (value < vec->ucs4_offset + vec->ucs4_count) return (char)value & 0xFF; - else - return CET_NOT_CONVERTABLE_DEFAULT; + else { + if (vec->fallback && (vec->fallback != vec)) + return cet_ucs4_to_char(value, vec->fallback); + else + return CET_NOT_CONVERTABLE_DEFAULT; + } } /* %%% cet_utf8_to_char %%% @@ -274,6 +278,7 @@ cet_str_utf8_to_any(const char *src, const cet_cs_vec_t *vec) char *res, *dest, *cend; if (c == NULL) return NULL; + if (vec->ucs4_count == 0) return xstrdup(src); /* UTF-8 -> UTF-8 */ len = strlen(c); res = dest = xmalloc(len + 1); /* target will become smaller or equal length */ diff --git a/cet.h b/cet.h index bdc2c114e..6b32f1edc 100644 --- a/cet.h +++ b/cet.h @@ -38,8 +38,8 @@ typedef struct cet_cs_vec_s { const char *name; /* name of character set */ const char **alias; /* alias table */ - int (*decode)(const char *, int *); /* ... to UCS-4 decoder !FUTURE! */ - short (*encode)(const int); /* UCS-4 to ... encoder !FUTURE! */ + struct cet_cs_vec_s *fallback; /* fallback character set */ + void *unused; const int *ucs4_map; /* char to UCS-4 value table */ const int ucs4_offset; /* first non standard character */ const int ucs4_count; /* values in table */ diff --git a/cet_util.c b/cet_util.c index 8b366110f..19e76f24d 100644 --- a/cet_util.c +++ b/cet_util.c @@ -49,9 +49,15 @@ static int cet_cs_alias_ct = 0; static int cet_cs_vec_ct = 0; static int cet_output = 0; -/* %%% short hand strings transmission for main character sets %%% */ +/* %%% fixed inbuild character sets %%% */ +#include "cet/ansi_x3_4_1968.h" #include "cet/iso_8859_1.h" +#include "cet/iso_8859_15.h" +#include "cet/cp1252.h" + +/* %%% short hand strings transmission for main character sets %%% */ + char * cet_str_utf8_to_iso8859_1(const char *src) { @@ -64,8 +70,6 @@ cet_str_iso8859_1_to_utf8(const char *src) return cet_str_any_to_utf8(src, &cet_cs_vec_iso_8859_1); } -#include "cet/iso_8859_15.h" - char * cet_str_utf8_to_iso8859_15(const char *src) { @@ -78,8 +82,6 @@ cet_str_iso8859_15_to_utf8(const char *src) return cet_str_any_to_utf8(src, &cet_cs_vec_iso_8859_15); } -#include "cet/ansi_x3_4_1968.h" - char * cet_str_utf8_to_us_ascii(const char *src) { @@ -92,8 +94,6 @@ cet_str_us_ascii_to_utf8(const char *src) return cet_str_any_to_utf8(src, &cet_cs_vec_ansi_x3_4_1968); } -#include "cet/cp1252.h" - char * cet_str_utf8_to_cp1252(const char *src) { @@ -333,7 +333,7 @@ void cet_check_cs(cet_cs_vec_t *vec) /* test well sorted link & extra tables */ { cet_ucs4_link_t *link; - + if ((link = (cet_ucs4_link_t *)vec->ucs4_link)) { int i, j; @@ -810,10 +810,28 @@ cet_register_cs(&cet_cs_vec_vps); qsort(list, c, sizeof(*list), cet_cs_alias_qsort_cb); cet_cs_alias = list; cet_cs_alias_ct = c; + + /* install fallback for ascii-like (first 128 ch.) character sets */ + for (i = 1250; i <= 1258; i++) { + char name[16]; + cet_cs_vec_t *vec; + + snprintf(name, sizeof(name), "WIN-CP%d", i); + if ((vec = cet_find_cs_by_name(name))) + vec->fallback = &cet_cs_vec_ansi_x3_4_1968; + } + for (i = 1; i <= 15; i++) { + char name[16]; + cet_cs_vec_t *vec; + + snprintf(name, sizeof(name), "ISO-8859-%d", i); + if ((vec = cet_find_cs_by_name(name))) + vec->fallback = &cet_cs_vec_ansi_x3_4_1968; + } + } #ifdef CET_DEBUG printf("We have registered %d character sets with %d aliases\n", cet_cs_vec_ct, cet_cs_alias_ct); #endif - } } cet_cs_vec_t * diff --git a/reference/cet/cet-sample.latin1.txt b/reference/cet/cet-sample.latin1.txt index 80efa7d23..477ca12be 100644 --- a/reference/cet/cet-sample.latin1.txt +++ b/reference/cet/cet-sample.latin1.txt @@ -1,17 +1,17 @@ Group sID sDescription fLat fLong fEasting fNorthing fAlt iColour iSymbol sHyperLink new 01 Hohndorf 50.738297 12.683029 -99999999.00 255 1 -new 0x9E $ - latin small letter z with caron 50.497971 13.027725 -99999999.00 255 1 +new 0x9E z - latin small letter z with caron 50.497971 13.027725 -99999999.00 255 1 new 0xC9 É - latin capital letter e with circumflex 50.497971 13.027725 -99999999.00 255 1 new 0xF0 ð - latin small letter eth (icelandic) 50.497971 13.027725 -99999999.00 255 1 -new CS Ovládací Prohlí$ení lokální síte 50.514406 13.638634 -99999999.00 255 1 +new CS Ovládací Prohlízení lokální síte 50.514406 13.638634 -99999999.00 255 1 new DA Tåning netværkssøgning áâãäåæéë 56.011734 9.847870 -99999999.00 255 1 new DE Himmelmühle äöüÄÖÜß 50.625865 13.060611 -99999999.00 255 1 new EO Trasercado de la loka ret 50.495281 13.027645 -99999999.00 255 1 new ES Matalascañas Navegación Táliga 37.007446 -6.558838 -99999999.00 255 1 new FR Boissière-École Contrôle réseau Mâle 48.679047 1.652069 -99999999.00 255 1 -new HR Pregledavanje lokalne mre$e 50.477937 12.510391 -99999999.00 255 1 +new HR Pregledavanje lokalne mreze 50.477937 12.510391 -99999999.00 255 1 new HU Hõgyész Vezérlõközpont Hálózat 46.491394 18.424072 -99999999.00 255 1 new IS Borgarfjörður 65.522461 -13.823547 -99999999.00 255 1 new SK Ovládacie centrum 50.724214 13.524871 -99999999.00 255 1 -new X1 $ðÉéÀàÈèÙùÂâÊêÎîÄäËëÖöÜüÆæÅ娸ÇçÑñ²³½É× 50.497971 13.027725 -99999999.00 255 1 -new X2 $ðÉéÀàÈèÙùÂâÊêÎîÄäËëÖöÜüÆæÅ娸ÇçÑñ²³½É× 50.497971 13.027725 -99999999.00 255 1 +new X1 zðÉéÀàÈèÙùÂâÊêÎîÄäËëÖöÜüÆæÅ娸ÇçÑñ²³½É× 50.497971 13.027725 -99999999.00 255 1 +new X2 zðÉéÀàÈèÙùÂâÊêÎîÄäËëÖöÜüÆæÅ娸ÇçÑñ²³½É× 50.497971 13.027725 -99999999.00 255 1 -- 2.30.2